# 1、创建SparkSql执行环境
from pyspark.sql import Window
from pyspark.sql.functions import *
from pyspark.sql.session import SparkSession

spark = SparkSession.builder.master("local").appName("sql").getOrCreate()

# 读取json格式的数据
# spark会自动解析json中的表结构
students_df = spark.read.format("json").load("../../data/students.json")

students_df \
    .select("*", row_number().over(Window.partitionBy("clazz").orderBy(col("age").desc())).alias("r")) \
    .where(col("r") <= 2) \
    .show()

# withColumn:在DF中增加列

students_df \
    .withColumn("r", row_number().over(Window.partitionBy("clazz").orderBy(col("age").desc()))) \
    .where("r <= 3") \
    .show()